Name: Naga Venkata Prabhu Shobith Paripalli
Registration Number: 21BAI1722
Campus: Chennai
G-mail: shobith.paripalli2021@vitstudent.ac.in
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv(r"C:\Users\shobi\Downloads\archive\House Price India.csv")
df.head()
| id | Date | number of bedrooms | number of bathrooms | living area | lot area | number of floors | waterfront present | number of views | condition of the house | ... | Built Year | Renovation Year | Postal Code | Lattitude | Longitude | living_area_renov | lot_area_renov | Number of schools nearby | Distance from the airport | Price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 6762810145 | 42491 | 5 | 2.50 | 3650 | 9050 | 2.0 | 0 | 4 | 5 | ... | 1921 | 0 | 122003 | 52.8645 | -114.557 | 2880 | 5400 | 2 | 58 | 2380000 |
| 1 | 6762810635 | 42491 | 4 | 2.50 | 2920 | 4000 | 1.5 | 0 | 0 | 5 | ... | 1909 | 0 | 122004 | 52.8878 | -114.470 | 2470 | 4000 | 2 | 51 | 1400000 |
| 2 | 6762810998 | 42491 | 5 | 2.75 | 2910 | 9480 | 1.5 | 0 | 0 | 3 | ... | 1939 | 0 | 122004 | 52.8852 | -114.468 | 2940 | 6600 | 1 | 53 | 1200000 |
| 3 | 6762812605 | 42491 | 4 | 2.50 | 3310 | 42998 | 2.0 | 0 | 0 | 3 | ... | 2001 | 0 | 122005 | 52.9532 | -114.321 | 3350 | 42847 | 3 | 76 | 838000 |
| 4 | 6762812919 | 42491 | 3 | 2.00 | 2710 | 4500 | 1.5 | 0 | 0 | 4 | ... | 1929 | 0 | 122006 | 52.9047 | -114.485 | 2060 | 4500 | 1 | 51 | 805000 |
5 rows × 23 columns
df.describe()
| id | Date | number of bedrooms | number of bathrooms | living area | lot area | number of floors | waterfront present | number of views | condition of the house | ... | Built Year | Renovation Year | Postal Code | Lattitude | Longitude | living_area_renov | lot_area_renov | Number of schools nearby | Distance from the airport | Price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 1.462000e+04 | 14620.000000 | 14620.000000 | 14620.000000 | 14620.000000 | 1.462000e+04 | 14620.000000 | 14620.000000 | 14620.000000 | 14620.000000 | ... | 14620.000000 | 14620.000000 | 14620.000000 | 14620.000000 | 14620.000000 | 14620.000000 | 14620.000000 | 14620.000000 | 14620.000000 | 1.462000e+04 |
| mean | 6.762821e+09 | 42604.538646 | 3.379343 | 2.129583 | 2098.262996 | 1.509328e+04 | 1.502360 | 0.007661 | 0.233105 | 3.430506 | ... | 1970.926402 | 90.924008 | 122033.062244 | 52.792848 | -114.404007 | 1996.702257 | 12753.500068 | 2.012244 | 64.950958 | 5.389322e+05 |
| std | 6.237575e+03 | 67.347991 | 0.938719 | 0.769934 | 928.275721 | 3.791962e+04 | 0.540239 | 0.087193 | 0.766259 | 0.664151 | ... | 29.493625 | 416.216661 | 19.082418 | 0.137522 | 0.141326 | 691.093366 | 26058.414467 | 0.817284 | 8.936008 | 3.675324e+05 |
| min | 6.762810e+09 | 42491.000000 | 1.000000 | 0.500000 | 370.000000 | 5.200000e+02 | 1.000000 | 0.000000 | 0.000000 | 1.000000 | ... | 1900.000000 | 0.000000 | 122003.000000 | 52.385900 | -114.709000 | 460.000000 | 651.000000 | 1.000000 | 50.000000 | 7.800000e+04 |
| 25% | 6.762815e+09 | 42546.000000 | 3.000000 | 1.750000 | 1440.000000 | 5.010750e+03 | 1.000000 | 0.000000 | 0.000000 | 3.000000 | ... | 1951.000000 | 0.000000 | 122017.000000 | 52.707600 | -114.519000 | 1490.000000 | 5097.750000 | 1.000000 | 57.000000 | 3.200000e+05 |
| 50% | 6.762821e+09 | 42600.000000 | 3.000000 | 2.250000 | 1930.000000 | 7.620000e+03 | 1.500000 | 0.000000 | 0.000000 | 3.000000 | ... | 1975.000000 | 0.000000 | 122032.000000 | 52.806400 | -114.421000 | 1850.000000 | 7620.000000 | 2.000000 | 65.000000 | 4.500000e+05 |
| 75% | 6.762826e+09 | 42662.000000 | 4.000000 | 2.500000 | 2570.000000 | 1.080000e+04 | 2.000000 | 0.000000 | 0.000000 | 4.000000 | ... | 1997.000000 | 0.000000 | 122048.000000 | 52.908900 | -114.315000 | 2380.000000 | 10125.000000 | 3.000000 | 73.000000 | 6.450000e+05 |
| max | 6.762832e+09 | 42734.000000 | 33.000000 | 8.000000 | 13540.000000 | 1.074218e+06 | 3.500000 | 1.000000 | 4.000000 | 5.000000 | ... | 2015.000000 | 2015.000000 | 122072.000000 | 53.007600 | -113.505000 | 6110.000000 | 560617.000000 | 3.000000 | 80.000000 | 7.700000e+06 |
8 rows × 23 columns
df.isnull().sum()
id 0 Date 0 number of bedrooms 0 number of bathrooms 0 living area 0 lot area 0 number of floors 0 waterfront present 0 number of views 0 condition of the house 0 grade of the house 0 Area of the house(excluding basement) 0 Area of the basement 0 Built Year 0 Renovation Year 0 Postal Code 0 Lattitude 0 Longitude 0 living_area_renov 0 lot_area_renov 0 Number of schools nearby 0 Distance from the airport 0 Price 0 dtype: int64
df.dtypes
id int64 Date int64 number of bedrooms int64 number of bathrooms float64 living area int64 lot area int64 number of floors float64 waterfront present int64 number of views int64 condition of the house int64 grade of the house int64 Area of the house(excluding basement) int64 Area of the basement int64 Built Year int64 Renovation Year int64 Postal Code int64 Lattitude float64 Longitude float64 living_area_renov int64 lot_area_renov int64 Number of schools nearby int64 Distance from the airport int64 Price int64 dtype: object
# Univariate Analysis
for i in df.columns:
plt.hist(df[i])
plt.title(i)
plt.show()
for i in df.columns:
plt.boxplot(df[i])
plt.title(i)
plt.show()
# Bi Variate Analysis
for i in df.columns:
plt.scatter(df[i],df['Price'])
plt.xlabel(i)
plt.ylabel("Price")
plt.show()
# Multivariate Variate Analysis
sns.pairplot(df)
<seaborn.axisgrid.PairGrid at 0x27b81bd0640>